#############################################
# ALLOCATES CENSUS UNIT IDs TO CENSUS TRACTS
#############################################
# Census Units are census tracts of minimum size (merged, if necessary)
# - Census tracts that are too small are merged with neighbors
#   - with whom they share the longest border
#   - that are located within the same municipality
# Note: this script doesn't do geometric operations
# It only assigns numbers of census units to census tracts,
# which can later be dissolved using ArcGIS
# based on code written by Christoph Nolte
# questions to bowydenbraber@gmail.com



library(xlsReadWrite)
library(foreign)

#read in files from previous document-Bowy:
d<-read.dbf("BraCens2010 - Key Data.dbf")


# Parameters for merging year-2000 census tracts
p <- data.frame(threshold=50000000, f_fid="CT00_FID", f_codigo="CODIGO", f_area="F_AREA_EQ", file_shp="CT00_NO.dbf", file_bnd="-/CT00_NO_Lines.dbf", file_out = "CT_CU00_M50.dbf", folder=ff("folder_name"), stringsAsFactors=FALSE)

# Parameters for merging year-2010 census tracts
#p <- data.frame(threshold=0.0025, f_fid="CT_FID", f_codigo="CD_GEOCODI", f_area="F_AREA", file_out=ff("Data/GIS/Processed/Population/Census/CT_CU10_0.0025.dbf"), file_shp=ff("Data/GIS/Processed/Population/Census/-/CT10_PD.dbf"), file_bnd=ff("Data/GIS/Processed/Population/Census/-/CT10_P_Lines.dbf"), , folder="C:/Users/chrnolte/Documents/Data/GIS/Processed/Population/Census/", stringsAsFactors=FALSE)

# CENSUS TRACT BOUNDARIES
# Read DBF of boundary file
bnd <- read.dbf(paste(p$folder,p$file_bnd,sep=""))
# Remove outer boundaries (boundaries that are lacking one neighbor)
bnd <- bnd[bnd$LEFT_FID > -1 & bnd$RIGHT_FID > -1,]
# Extract unique IDs of all census tracts
fids <- union(unique(bnd$LEFT_FID),unique(bnd$RIGHT_FID))

# CENSUS TRACT AREAS
# Read DBF of census tract shapefile
shp.orig <- read.dbf(paste(p$folder,p$file_shp,sep=""))
# Subselect & rename columns
shp.orig <- shp.orig[,c(p$f_fid, p$f_codigo, p$f_area)]
colnames(shp.orig) <- c("FID", p$f_codigo, "area")
# Extract municipality code
shp.orig$mun <- substr(shp.orig[,p$f_codigo],1,7)

# Subset census tract shapefile
shp <- shp.orig[,c("FID","area","mun")]
# Remove census tracts without neighbors
# (Not sure whether there are any census tracts without neighbor...)
z.nn <- which(shp$FID %in% setdiff(shp$FID,fids))
if(length(z.nn)>0) shp <- shp[-z.nn,]

# The "kids" variable is a list of strings which contains the IDs of all census tracts that have been merged into this one
# Initialize kids (each census tract is its own kid)
shp$kids <- as.character(shp$FID)

# Initalize counters
# fid.counter: generates new IDs (for merged shapes)
fid.counter <- max(shp$FID) + 1
# i.max: maximum number of repetitions
i.max <- nrow(shp[shp$area < p$threshold,])
# Loop counter
i <- 1

# As long as the smallest area in the dataset is smaller than the threshold --> continue merging
while (min(shp$area) < p$threshold & i <= i.max) {
  
  # FIND SMALLEST SHAPE
  z <- which.min(shp$area)
  # Store ID and municipality code
  fid <- shp$FID[z]
  fid.mun <- shp$mun[z]
  
  # FIND ITS "MOM": NEIGHBOR WITHIN THE NAME MUNICIPALITY WITH WHICH IT SHARES THE LONGEST BOUNDARY
  
  # Create dataset of all neighbor boundaries
  l <- bnd[bnd$LEFT_FID == fid & !(bnd$RIGHT_FID == fid),]
  colnames(l) <- c("me", "you", "l")
  r <- bnd[bnd$RIGHT_FID == fid & !(bnd$LEFT_FID == fid),]
  colnames(r) <- c("you", "me", "l")
  x <- rbind(l,r)[,c("you","l")]
  
  # Sum up boundaries by neighbor
  x <- aggregate(x, by=list(x$you), FUN=sum)[,c(1,3)]
  
  # Select boundaries within same municipality
  x <- merge(x,shp,by.x="Group.1",by.y="FID")
  x.mun <- x[x$mun==fid.mun,]
  if(!(nrow(x.mun)>0)) stop(paste("FID",fid,"has no neighbor in the same municipality!"))
  
  # Identify FID of "mom" (neighbor with largest shared boundary)
  fid.mom <- x.mun[which.max(x.mun$l),1]
  z.mom <- which(shp$FID==fid.mom)
  
  # Create entry for "merged" shape with
  # 1) New ID 2) Area: sum of kid and mom 3) kids: kids of "kid" and "mom"
  rw <- data.frame(FID=fid.counter, area=shp[z.mom,"area"]+shp[z,"area"], mun=fid.mun, kids = paste(shp[z.mom,"kids"],shp[z,"kids"],sep=","))
  shp <- rbind(shp[-c(z,z.mom),],rw) #delete kids, add mom
  bnd[bnd$LEFT_FID %in% c(fid,fid.mom),"LEFT_FID"] <- fid.counter # update fid in boundaries
  bnd[bnd$RIGHT_FID %in% c(fid,fid.mom),"RIGHT_FID"] <- fid.counter # update fid in boundaries       
  fid.counter <- fid.counter + 1 # next ID
  i <- i + 1
}

# MERGE NEW CENSUS UNIT IDS ONTO ORIGINAL SHAPE
rownames(shp.orig) <- shp.orig$FID
shp.orig$CU_ID <- shp.orig$FID
for (j in 1:nrow(shp)) {
  shp.i <- shp[j,]
  kids <- as.numeric(strsplit(shp.i$kids,split=",")[[1]])
  for (kid in kids) {
    shp.orig[as.character(kid),"CU_ID"] <- shp.i$FID
  }
}

write.dbf(shp.orig[,c("FID", p$f_codigo, "CU_ID")], paste(p$folder, p$file_out, sep=""))









